#define PETSCVEC_DLL

/*
     Some useful vector utility functions.
*/
#include "private/vecimpl.h"          /*I "petscvec.h" I*/

extern MPI_Op VecMax_Local_Op;
extern MPI_Op VecMin_Local_Op;

#undef __FUNCT__  
#define __FUNCT__ "VecStrideScale"
/*@
   VecStrideScale - Scales a subvector of a vector defined 
   by a starting point and a stride.

   Collective on Vec

   Input Parameter:
+  v - the vector 
.  start - starting point of the subvector (defined by a stride)
-  scale - value to multiply each subvector entry by

   Notes:
   One must call VecSetBlockSize() before this routine to set the stride 
   information, or use a vector created from a multicomponent DA.

   This will only work if the desire subvector is a stride subvector

   Level: advanced

   Concepts: scale^on stride of vector
   Concepts: stride^scale

.seealso: VecNorm(), VecStrideGather(), VecStrideScatter(), VecStrideMin(), VecStrideMax(), VecStrideScale()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecStrideScale(Vec v,PetscInt start,PetscScalar scale)
{
  PetscErrorCode ierr;
  PetscInt       i,n,bs;
  PetscScalar    *x;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);

  bs   = v->map->bs;
  if (start < 0) {
    SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative start %D",start);
  } else if (start >= bs) {
    SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Start of stride subvector (%D) is too large for stride\n\
            Have you set the vector blocksize (%D) correctly with VecSetBlockSize()?",start,bs);
  }
  x += start;

  for (i=0; i<n; i+=bs) {
    x[i] *= scale;
  }
  x -= start;

  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecStrideNorm"
/*@
   VecStrideNorm - Computes the norm of subvector of a vector defined 
   by a starting point and a stride.

   Collective on Vec

   Input Parameter:
+  v - the vector 
.  start - starting point of the subvector (defined by a stride)
-  ntype - type of norm, one of NORM_1, NORM_2, NORM_INFINITY

   Output Parameter:
.  norm - the norm

   Notes:
   One must call VecSetBlockSize() before this routine to set the stride 
   information, or use a vector created from a multicomponent DA.

   If x is the array representing the vector x then this computes the norm 
   of the array (x[start],x[start+stride],x[start+2*stride], ....)

   This is useful for computing, say the norm of the pressure variable when
   the pressure is stored (interlaced) with other variables, say density etc.

   This will only work if the desire subvector is a stride subvector

   Level: advanced

   Concepts: norm^on stride of vector
   Concepts: stride^norm

.seealso: VecNorm(), VecStrideGather(), VecStrideScatter(), VecStrideMin(), VecStrideMax()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecStrideNorm(Vec v,PetscInt start,NormType ntype,PetscReal *nrm)
{
  PetscErrorCode ierr;
  PetscInt       i,n,bs;
  PetscScalar    *x;
  PetscReal      tnorm;
  MPI_Comm       comm;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  PetscValidDoublePointer(nrm,3);
  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);
  ierr = PetscObjectGetComm((PetscObject)v,&comm);CHKERRQ(ierr);

  bs   = v->map->bs;
  if (start < 0) {
    SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative start %D",start);
  } else if (start >= bs) {
    SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Start of stride subvector (%D) is too large for stride\n\
            Have you set the vector blocksize (%D) correctly with VecSetBlockSize()?",start,bs);
  }
  x += start;

  if (ntype == NORM_2) {
    PetscScalar sum = 0.0;
    for (i=0; i<n; i+=bs) {
      sum += x[i]*(PetscConj(x[i]));
    }
    tnorm  = PetscRealPart(sum);
    ierr   = MPI_Allreduce(&tnorm,nrm,1,MPIU_REAL,MPI_SUM,comm);CHKERRQ(ierr);
    *nrm = sqrt(*nrm);
  } else if (ntype == NORM_1) {
    tnorm = 0.0;
    for (i=0; i<n; i+=bs) {
      tnorm += PetscAbsScalar(x[i]);
    }
    ierr   = MPI_Allreduce(&tnorm,nrm,1,MPIU_REAL,MPI_SUM,comm);CHKERRQ(ierr);
  } else if (ntype == NORM_INFINITY) {
    PetscReal tmp;
    tnorm = 0.0;

    for (i=0; i<n; i+=bs) {
      if ((tmp = PetscAbsScalar(x[i])) > tnorm) tnorm = tmp;
      /* check special case of tmp == NaN */
      if (tmp != tmp) {tnorm = tmp; break;}
    } 
    ierr   = MPI_Allreduce(&tnorm,nrm,1,MPIU_REAL,MPI_MAX,comm);CHKERRQ(ierr);
  } else {
    SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown norm type");
  }

  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecStrideMax"
/*@
   VecStrideMax - Computes the maximum of subvector of a vector defined 
   by a starting point and a stride and optionally its location.

   Collective on Vec

   Input Parameter:
+  v - the vector 
-  start - starting point of the subvector (defined by a stride)

   Output Parameter:
+  index - the location where the maximum occurred  (pass PETSC_NULL if not required)
-  nrm - the max

   Notes:
   One must call VecSetBlockSize() before this routine to set the stride 
   information, or use a vector created from a multicomponent DA.

   If xa is the array representing the vector x, then this computes the max
   of the array (xa[start],xa[start+stride],xa[start+2*stride], ....)

   This is useful for computing, say the maximum of the pressure variable when
   the pressure is stored (interlaced) with other variables, e.g., density, etc.
   This will only work if the desire subvector is a stride subvector.

   Level: advanced

   Concepts: maximum^on stride of vector
   Concepts: stride^maximum

.seealso: VecMax(), VecStrideNorm(), VecStrideGather(), VecStrideScatter(), VecStrideMin()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecStrideMax(Vec v,PetscInt start,PetscInt *idex,PetscReal *nrm)
{
  PetscErrorCode ierr;
  PetscInt       i,n,bs,id;
  PetscScalar    *x;
  PetscReal      max,tmp;
  MPI_Comm       comm;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  PetscValidDoublePointer(nrm,3);

  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);
  ierr = PetscObjectGetComm((PetscObject)v,&comm);CHKERRQ(ierr);

  bs   = v->map->bs;
  if (start < 0) {
    SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative start %D",start);
  } else if (start >= bs) {
    SETERRQ2(PETSC_ERR_ARG_WRONG,"Start of stride subvector (%D) is too large for stride\n\
            Have you set the vector blocksize (%D) correctly with VecSetBlockSize()?",start,bs);
  }
  x += start;

  id = -1;
  if (!n) {
    max = PETSC_MIN;
  } else {
    id  = 0;
#if defined(PETSC_USE_COMPLEX)
    max = PetscRealPart(x[0]);
#else
    max = x[0];
#endif
    for (i=bs; i<n; i+=bs) {
#if defined(PETSC_USE_COMPLEX)
      if ((tmp = PetscRealPart(x[i])) > max) { max = tmp; id = i;}
#else
      if ((tmp = x[i]) > max) { max = tmp; id = i;} 
#endif
    }
  }
  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);

  if (!idex) {
    ierr   = MPI_Allreduce(&max,nrm,1,MPIU_REAL,MPI_MAX,comm);CHKERRQ(ierr);
  } else {
    PetscReal in[2],out[2];
    PetscInt  rstart;

    ierr  = VecGetOwnershipRange(v,&rstart,PETSC_NULL);CHKERRQ(ierr);
    in[0] = max;
    in[1] = rstart+id+start;
    ierr  = MPI_Allreduce(in,out,2,MPIU_REAL,VecMax_Local_Op,((PetscObject)v)->comm);CHKERRQ(ierr);
    *nrm  = out[0];
    *idex = (PetscInt)out[1];
  }

  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecStrideMin"
/*@
   VecStrideMin - Computes the minimum of subvector of a vector defined 
   by a starting point and a stride and optionally its location.

   Collective on Vec

   Input Parameter:
+  v - the vector 
-  start - starting point of the subvector (defined by a stride)

   Output Parameter:
+  idex - the location where the minimum occurred. (pass PETSC_NULL if not required)
-  nrm - the min

   Level: advanced

   Notes:
   One must call VecSetBlockSize() before this routine to set the stride 
   information, or use a vector created from a multicomponent DA.

   If xa is the array representing the vector x, then this computes the min
   of the array (xa[start],xa[start+stride],xa[start+2*stride], ....)

   This is useful for computing, say the minimum of the pressure variable when
   the pressure is stored (interlaced) with other variables, e.g., density, etc.
   This will only work if the desire subvector is a stride subvector.

   Concepts: minimum^on stride of vector
   Concepts: stride^minimum

.seealso: VecMin(), VecStrideNorm(), VecStrideGather(), VecStrideScatter(), VecStrideMax()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecStrideMin(Vec v,PetscInt start,PetscInt *idex,PetscReal *nrm)
{
  PetscErrorCode ierr;
  PetscInt       i,n,bs,id;
  PetscScalar    *x;
  PetscReal      min,tmp;
  MPI_Comm       comm;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  PetscValidDoublePointer(nrm,4);

  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);
  ierr = PetscObjectGetComm((PetscObject)v,&comm);CHKERRQ(ierr);

  bs   = v->map->bs;
  if (start < 0) {
    SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative start %D",start);
  } else if (start >= bs) {
    SETERRQ2(PETSC_ERR_ARG_WRONG,"Start of stride subvector (%D) is too large for stride\n\
            Have you set the vector blocksize (%D) correctly with VecSetBlockSize()?",start,bs);
  }
  x += start;

  id = -1;
  if (!n) {
    min = PETSC_MAX;
  } else {
    id = 0;
#if defined(PETSC_USE_COMPLEX)
    min = PetscRealPart(x[0]);
#else
    min = x[0];
#endif
    for (i=bs; i<n; i+=bs) {
#if defined(PETSC_USE_COMPLEX)
      if ((tmp = PetscRealPart(x[i])) < min) { min = tmp; id = i;}
#else
      if ((tmp = x[i]) < min) { min = tmp; id = i;} 
#endif
    }
  }
  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);

  if (!idex) {
    ierr   = MPI_Allreduce(&min,nrm,1,MPIU_REAL,MPI_MIN,comm);CHKERRQ(ierr);
  } else {
    PetscReal in[2],out[2];
    PetscInt  rstart;

    ierr  = VecGetOwnershipRange(v,&rstart,PETSC_NULL);CHKERRQ(ierr);
    in[0] = min;
    in[1] = rstart+id;
    ierr  = MPI_Allreduce(in,out,2,MPIU_REAL,VecMin_Local_Op,((PetscObject)v)->comm);CHKERRQ(ierr);
    *nrm  = out[0];
    *idex = (PetscInt)out[1];
  }

  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecStrideScaleAll"
/*@
   VecStrideScaleAll - Scales the subvectors of a vector defined 
   by a starting point and a stride.

   Collective on Vec

   Input Parameter:
+  v - the vector 
-  scales - values to multiply each subvector entry by

   Notes:
   One must call VecSetBlockSize() before this routine to set the stride 
   information, or use a vector created from a multicomponent DA.


   Level: advanced

   Concepts: scale^on stride of vector
   Concepts: stride^scale

.seealso: VecNorm(), VecStrideScale(), VecScale(), VecStrideGather(), VecStrideScatter(), VecStrideMin(), VecStrideMax()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecStrideScaleAll(Vec v,PetscScalar *scales)
{
  PetscErrorCode ierr;
  PetscInt       i,j,n,bs;
  PetscScalar    *x;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  PetscValidScalarPointer(scales,2);
  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);

  bs   = v->map->bs;

  /* need to provide optimized code for each bs */
  for (i=0; i<n; i+=bs) {
    for (j=0; j<bs; j++) {
      x[i+j] *= scales[j];
    }
  }
  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecStrideNormAll"
/*@
   VecStrideNormAll - Computes the norms  subvectors of a vector defined 
   by a starting point and a stride.

   Collective on Vec

   Input Parameter:
+  v - the vector 
-  ntype - type of norm, one of NORM_1, NORM_2, NORM_INFINITY

   Output Parameter:
.  nrm - the norms

   Notes:
   One must call VecSetBlockSize() before this routine to set the stride 
   information, or use a vector created from a multicomponent DA.

   If x is the array representing the vector x then this computes the norm 
   of the array (x[start],x[start+stride],x[start+2*stride], ....)

   This is useful for computing, say the norm of the pressure variable when
   the pressure is stored (interlaced) with other variables, say density etc.

   This will only work if the desire subvector is a stride subvector

   Level: advanced

   Concepts: norm^on stride of vector
   Concepts: stride^norm

.seealso: VecNorm(), VecStrideGather(), VecStrideScatter(), VecStrideMin(), VecStrideMax()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecStrideNormAll(Vec v,NormType ntype,PetscReal nrm[])
{
  PetscErrorCode ierr;
  PetscInt       i,j,n,bs;
  PetscScalar    *x;
  PetscReal      tnorm[128];
  MPI_Comm       comm;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  PetscValidDoublePointer(nrm,3);
  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);
  ierr = PetscObjectGetComm((PetscObject)v,&comm);CHKERRQ(ierr);

  bs   = v->map->bs;
  if (bs > 128) SETERRQ(PETSC_ERR_SUP,"Currently supports only blocksize up to 128");

  if (ntype == NORM_2) {
    PetscScalar sum[128];
    for (j=0; j<bs; j++) sum[j] = 0.0;
    for (i=0; i<n; i+=bs) {
      for (j=0; j<bs; j++) {
	sum[j] += x[i+j]*(PetscConj(x[i+j]));
      }
    }
    for (j=0; j<bs; j++) {
      tnorm[j]  = PetscRealPart(sum[j]);
    }
    ierr   = MPI_Allreduce(tnorm,nrm,bs,MPIU_REAL,MPI_SUM,comm);CHKERRQ(ierr);
    for (j=0; j<bs; j++) {
      nrm[j] = sqrt(nrm[j]);
    }
  } else if (ntype == NORM_1) {
    for (j=0; j<bs; j++) {
      tnorm[j] = 0.0;
    }
    for (i=0; i<n; i+=bs) {
      for (j=0; j<bs; j++) {
	tnorm[j] += PetscAbsScalar(x[i+j]);
      }
    }
    ierr   = MPI_Allreduce(tnorm,nrm,bs,MPIU_REAL,MPI_SUM,comm);CHKERRQ(ierr);
  } else if (ntype == NORM_INFINITY) {
    PetscReal tmp;
    for (j=0; j<bs; j++) {
      tnorm[j] = 0.0;
    }

    for (i=0; i<n; i+=bs) {
      for (j=0; j<bs; j++) {
	if ((tmp = PetscAbsScalar(x[i+j])) > tnorm[j]) tnorm[j] = tmp;
	/* check special case of tmp == NaN */
	if (tmp != tmp) {tnorm[j] = tmp; break;}
      }
    } 
    ierr   = MPI_Allreduce(tnorm,nrm,bs,MPIU_REAL,MPI_MAX,comm);CHKERRQ(ierr);
  } else {
    SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown norm type");
  }

  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecStrideMaxAll"
/*@
   VecStrideMaxAll - Computes the maximums of subvectors of a vector defined 
   by a starting point and a stride and optionally its location.

   Collective on Vec

   Input Parameter:
.  v - the vector 

   Output Parameter:
+  index - the location where the maximum occurred (not supported, pass PETSC_NULL,
           if you need this, send mail to petsc-maint@mcs.anl.gov to request it)
-  nrm - the maximums

   Notes:
   One must call VecSetBlockSize() before this routine to set the stride 
   information, or use a vector created from a multicomponent DA.

   This is useful for computing, say the maximum of the pressure variable when
   the pressure is stored (interlaced) with other variables, e.g., density, etc.
   This will only work if the desire subvector is a stride subvector.

   Level: advanced

   Concepts: maximum^on stride of vector
   Concepts: stride^maximum

.seealso: VecMax(), VecStrideNorm(), VecStrideGather(), VecStrideScatter(), VecStrideMin()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecStrideMaxAll(Vec v,PetscInt idex[],PetscReal nrm[])
{
  PetscErrorCode ierr;
  PetscInt       i,j,n,bs;
  PetscScalar    *x;
  PetscReal      max[128],tmp;
  MPI_Comm       comm;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  PetscValidDoublePointer(nrm,3);
  if (idex) {
    SETERRQ(PETSC_ERR_SUP,"No support yet for returning index; send mail to petsc-maint@mcs.anl.gov asking for it");
  }
  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);
  ierr = PetscObjectGetComm((PetscObject)v,&comm);CHKERRQ(ierr);

  bs   = v->map->bs;
  if (bs > 128) SETERRQ(PETSC_ERR_SUP,"Currently supports only blocksize up to 128");

  if (!n) {
    for (j=0; j<bs; j++) {
      max[j] = PETSC_MIN;
    }
  } else {
    for (j=0; j<bs; j++) {
#if defined(PETSC_USE_COMPLEX)
      max[j] = PetscRealPart(x[j]);
#else
      max[j] = x[j];
#endif
    }
    for (i=bs; i<n; i+=bs) {
      for (j=0; j<bs; j++) {
#if defined(PETSC_USE_COMPLEX)
	if ((tmp = PetscRealPart(x[i+j])) > max[j]) { max[j] = tmp;}
#else
	if ((tmp = x[i+j]) > max[j]) { max[j] = tmp; } 
#endif
      }
    }
  }
  ierr   = MPI_Allreduce(max,nrm,bs,MPIU_REAL,MPI_MAX,comm);CHKERRQ(ierr);

  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecStrideMinAll"
/*@
   VecStrideMinAll - Computes the minimum of subvector of a vector defined 
   by a starting point and a stride and optionally its location.

   Collective on Vec

   Input Parameter:
.  v - the vector 

   Output Parameter:
+  idex - the location where the minimum occurred (not supported, pass PETSC_NULL,
           if you need this, send mail to petsc-maint@mcs.anl.gov to request it)
-  nrm - the minimums

   Level: advanced

   Notes:
   One must call VecSetBlockSize() before this routine to set the stride 
   information, or use a vector created from a multicomponent DA.

   This is useful for computing, say the minimum of the pressure variable when
   the pressure is stored (interlaced) with other variables, e.g., density, etc.
   This will only work if the desire subvector is a stride subvector.

   Concepts: minimum^on stride of vector
   Concepts: stride^minimum

.seealso: VecMin(), VecStrideNorm(), VecStrideGather(), VecStrideScatter(), VecStrideMax()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecStrideMinAll(Vec v,PetscInt idex[],PetscReal nrm[])
{
  PetscErrorCode ierr;
  PetscInt       i,n,bs,j;
  PetscScalar    *x;
  PetscReal      min[128],tmp;
  MPI_Comm       comm;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  PetscValidDoublePointer(nrm,3);
  if (idex) {
    SETERRQ(PETSC_ERR_SUP,"No support yet for returning index; send mail to petsc-maint@mcs.anl.gov asking for it");
  }
  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);
  ierr = PetscObjectGetComm((PetscObject)v,&comm);CHKERRQ(ierr);

  bs   = v->map->bs;
  if (bs > 128) SETERRQ(PETSC_ERR_SUP,"Currently supports only blocksize up to 128");

  if (!n) {
    for (j=0; j<bs; j++) {
      min[j] = PETSC_MAX;
    }
  } else {
    for (j=0; j<bs; j++) {
#if defined(PETSC_USE_COMPLEX)
      min[j] = PetscRealPart(x[j]);
#else
      min[j] = x[j];
#endif
    }
    for (i=bs; i<n; i+=bs) {
      for (j=0; j<bs; j++) {
#if defined(PETSC_USE_COMPLEX)
	if ((tmp = PetscRealPart(x[i+j])) < min[j]) { min[j] = tmp;}
#else
	if ((tmp = x[i+j]) < min[j]) { min[j] = tmp; } 
#endif
      }
    }
  }
  ierr   = MPI_Allreduce(min,nrm,bs,MPIU_REAL,MPI_MIN,comm);CHKERRQ(ierr);

  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

/*----------------------------------------------------------------------------------------------*/
#undef __FUNCT__  
#define __FUNCT__ "VecStrideGatherAll"
/*@
   VecStrideGatherAll - Gathers all the single components from a multi-component vector into
   separate vectors.

   Collective on Vec

   Input Parameter:
+  v - the vector 
-  addv - one of ADD_VALUES,INSERT_VALUES,MAX_VALUES

   Output Parameter:
.  s - the location where the subvectors are stored

   Notes:
   One must call VecSetBlockSize() before this routine to set the stride 
   information, or use a vector created from a multicomponent DA.

   If x is the array representing the vector x then this gathers
   the arrays (x[start],x[start+stride],x[start+2*stride], ....)
   for start=0,1,2,...bs-1

   The parallel layout of the vector and the subvector must be the same;
   i.e., nlocal of v = stride*(nlocal of s) 

   Not optimized; could be easily

   Level: advanced

   Concepts: gather^into strided vector

.seealso: VecStrideNorm(), VecStrideScatter(), VecStrideMin(), VecStrideMax(), VecStrideGather(),
          VecStrideScatterAll()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecStrideGatherAll(Vec v,Vec s[],InsertMode addv)
{
  PetscErrorCode ierr;
  PetscInt       i,n,n2,bs,j,k,*bss = PETSC_NULL,nv,jj,nvc;
  PetscScalar    *x,**y;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  PetscValidPointer(s,2);
  PetscValidHeaderSpecific(*s,VEC_COOKIE,2);
  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetLocalSize(s[0],&n2);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);
  bs   = v->map->bs;
  if (bs < 0) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Input vector does not have a valid blocksize set");

  ierr = PetscMalloc2(bs,PetscReal*,&y,bs,PetscInt,&bss);CHKERRQ(ierr);
  nv   = 0;
  nvc  = 0;
  for (i=0; i<bs; i++) {
    ierr = VecGetBlockSize(s[i],&bss[i]);CHKERRQ(ierr);
    if (bss[i] < 1) bss[i] = 1; /* if user never set it then assume 1  Re: [PETSC #8241] VecStrideGatherAll */
    ierr = VecGetArray(s[i],&y[i]);CHKERRQ(ierr);
    nvc  += bss[i];
    nv++;
    if (nvc > bs)  SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of subvectors in subvectors > number of vectors in main vector");
    if (nvc == bs) break;
  }

  n =  n/bs;

  jj = 0;
  if (addv == INSERT_VALUES) {
    for (j=0; j<nv; j++) {
      for (k=0; k<bss[j]; k++) {
	for (i=0; i<n; i++) {
	  y[j][i*bss[j] + k] = x[bs*i+jj+k];
        }
      }
      jj += bss[j];
    }
  } else if (addv == ADD_VALUES) {
    for (j=0; j<nv; j++) {
      for (k=0; k<bss[j]; k++) {
	for (i=0; i<n; i++) {
	  y[j][i*bss[j] + k] += x[bs*i+jj+k];
        }
      }
      jj += bss[j];
    }
#if !defined(PETSC_USE_COMPLEX)
  } else if (addv == MAX_VALUES) {
    for (j=0; j<nv; j++) {
      for (k=0; k<bss[j]; k++) {
	for (i=0; i<n; i++) {
	  y[j][i*bss[j] + k] = PetscMax(y[j][i*bss[j] + k],x[bs*i+jj+k]);
        }
      }
      jj += bss[j];
    }
#endif
  } else {
    SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown insert type");
  }

  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  for (i=0; i<nv; i++) {
    ierr = VecRestoreArray(s[i],&y[i]);CHKERRQ(ierr);
  }

  ierr = PetscFree2(y,bss);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecStrideScatterAll"
/*@
   VecStrideScatterAll - Scatters all the single components from separate vectors into 
     a multi-component vector.

   Collective on Vec

   Input Parameter:
+  s - the location where the subvectors are stored
-  addv - one of ADD_VALUES,INSERT_VALUES,MAX_VALUES

   Output Parameter:
.  v - the multicomponent vector 

   Notes:
   One must call VecSetBlockSize() before this routine to set the stride 
   information, or use a vector created from a multicomponent DA.

   The parallel layout of the vector and the subvector must be the same;
   i.e., nlocal of v = stride*(nlocal of s) 

   Not optimized; could be easily

   Level: advanced

   Concepts:  scatter^into strided vector

.seealso: VecStrideNorm(), VecStrideScatter(), VecStrideMin(), VecStrideMax(), VecStrideGather(),
          VecStrideScatterAll()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecStrideScatterAll(Vec s[],Vec v,InsertMode addv)
{
  PetscErrorCode ierr;
  PetscInt        i,n,n2,bs,j,jj,k,*bss = PETSC_NULL,nv,nvc;
  PetscScalar     *x,**y;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  PetscValidPointer(s,2);
  PetscValidHeaderSpecific(*s,VEC_COOKIE,2);
  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetLocalSize(s[0],&n2);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);
  bs   = v->map->bs;
  if (bs < 0) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Input vector does not have a valid blocksize set");

  ierr = PetscMalloc2(bs,PetscScalar**,&y,bs,PetscInt,&bss);CHKERRQ(ierr);
  nv  = 0;
  nvc = 0;
  for (i=0; i<bs; i++) {
    ierr = VecGetBlockSize(s[i],&bss[i]);CHKERRQ(ierr);
    if (bss[i] < 1) bss[i] = 1; /* if user never set it then assume 1  Re: [PETSC #8241] VecStrideGatherAll */
    ierr = VecGetArray(s[i],&y[i]);CHKERRQ(ierr);
    nvc  += bss[i];
    nv++;
    if (nvc > bs)  SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of subvectors in subvectors > number of vectors in main vector");
    if (nvc == bs) break;
  }

  n =  n/bs;

  jj = 0;
  if (addv == INSERT_VALUES) {
    for (j=0; j<nv; j++) {
      for (k=0; k<bss[j]; k++) {
	for (i=0; i<n; i++) {
	  x[bs*i+jj+k] = y[j][i*bss[j] + k];
        }
      }
      jj += bss[j];
    }
  } else if (addv == ADD_VALUES) {
    for (j=0; j<nv; j++) {
      for (k=0; k<bss[j]; k++) {
	for (i=0; i<n; i++) {
	  x[bs*i+jj+k] += y[j][i*bss[j] + k];
        }
      }
      jj += bss[j];
    }
#if !defined(PETSC_USE_COMPLEX)
  } else if (addv == MAX_VALUES) {
    for (j=0; j<nv; j++) {
      for (k=0; k<bss[j]; k++) {
	for (i=0; i<n; i++) {
	  x[bs*i+jj+k] = PetscMax(x[bs*i+jj+k],y[j][i*bss[j] + k]);
        }
      }
      jj += bss[j];
    }
#endif
  } else {
    SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown insert type");
  }

  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  for (i=0; i<nv; i++) {
    ierr = VecRestoreArray(s[i],&y[i]);CHKERRQ(ierr);
  }
  ierr = PetscFree2(y,bss);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecStrideGather"
/*@
   VecStrideGather - Gathers a single component from a multi-component vector into
   another vector.

   Collective on Vec

   Input Parameter:
+  v - the vector 
.  start - starting point of the subvector (defined by a stride)
-  addv - one of ADD_VALUES,INSERT_VALUES,MAX_VALUES

   Output Parameter:
.  s - the location where the subvector is stored

   Notes:
   One must call VecSetBlockSize() before this routine to set the stride 
   information, or use a vector created from a multicomponent DA.

   If x is the array representing the vector x then this gathers
   the array (x[start],x[start+stride],x[start+2*stride], ....)

   The parallel layout of the vector and the subvector must be the same;
   i.e., nlocal of v = stride*(nlocal of s) 

   Not optimized; could be easily

   Level: advanced

   Concepts: gather^into strided vector

.seealso: VecStrideNorm(), VecStrideScatter(), VecStrideMin(), VecStrideMax(), VecStrideGatherAll(),
          VecStrideScatterAll()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecStrideGather(Vec v,PetscInt start,Vec s,InsertMode addv)
{
  PetscErrorCode ierr;
  PetscInt       i,n,bs,ns;
  PetscScalar    *x,*y;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  PetscValidHeaderSpecific(s,VEC_COOKIE,3);
  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetLocalSize(s,&ns);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);
  ierr = VecGetArray(s,&y);CHKERRQ(ierr);

  bs   = v->map->bs;
  if (start < 0) {
    SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative start %D",start);
  } else if (start >= bs) {
    SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Start of stride subvector (%D) is too large for stride\n\
            Have you set the vector blocksize (%D) correctly with VecSetBlockSize()?",start,bs);
  }
  if (n != ns*bs) {
    SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Subvector length * blocksize %D not correct for gather from original vector %D",ns*bs,n);
  }
  x += start;
  n =  n/bs;

  if (addv == INSERT_VALUES) {
    for (i=0; i<n; i++) {
      y[i] = x[bs*i];
    }
  } else if (addv == ADD_VALUES) {
    for (i=0; i<n; i++) {
      y[i] += x[bs*i];
    }
#if !defined(PETSC_USE_COMPLEX)
  } else if (addv == MAX_VALUES) {
    for (i=0; i<n; i++) {
      y[i] = PetscMax(y[i],x[bs*i]);
    }
#endif
  } else {
    SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown insert type");
  }

  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(s,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecStrideScatter"
/*@
   VecStrideScatter - Scatters a single component from a vector into a multi-component vector.

   Collective on Vec

   Input Parameter:
+  s - the single-component vector 
.  start - starting point of the subvector (defined by a stride)
-  addv - one of ADD_VALUES,INSERT_VALUES,MAX_VALUES

   Output Parameter:
.  v - the location where the subvector is scattered (the multi-component vector)

   Notes:
   One must call VecSetBlockSize() on the multi-component vector before this
   routine to set the stride  information, or use a vector created from a multicomponent DA.

   The parallel layout of the vector and the subvector must be the same;
   i.e., nlocal of v = stride*(nlocal of s) 

   Not optimized; could be easily

   Level: advanced

   Concepts: scatter^into strided vector

.seealso: VecStrideNorm(), VecStrideGather(), VecStrideMin(), VecStrideMax(), VecStrideGatherAll(),
          VecStrideScatterAll()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecStrideScatter(Vec s,PetscInt start,Vec v,InsertMode addv)
{
  PetscErrorCode ierr;
  PetscInt       i,n,bs,ns;
  PetscScalar    *x,*y;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  PetscValidHeaderSpecific(s,VEC_COOKIE,3);
  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetLocalSize(s,&ns);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);
  ierr = VecGetArray(s,&y);CHKERRQ(ierr);

  bs   = v->map->bs;
  if (start < 0) {
    SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative start %D",start);
  } else if (start >= bs) {
    SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Start of stride subvector (%D) is too large for stride\n\
            Have you set the vector blocksize (%D) correctly with VecSetBlockSize()?",start,bs);
  }
  if (n != ns*bs) {
    SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Subvector length * blocksize %D not correct for scatter to multicomponent vector %D",ns*bs,n);
  }
  x += start;
  n =  n/bs;


  if (addv == INSERT_VALUES) {
    for (i=0; i<n; i++) {
      x[bs*i] = y[i];
    }
  } else if (addv == ADD_VALUES) {
    for (i=0; i<n; i++) {
      x[bs*i] += y[i];
    }
#if !defined(PETSC_USE_COMPLEX)
  } else if (addv == MAX_VALUES) {
    for (i=0; i<n; i++) {
      x[bs*i] = PetscMax(y[i],x[bs*i]);
    }
#endif
  } else {
    SETERRQ(PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown insert type");
  }


  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  ierr = VecRestoreArray(s,&y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecReciprocal_Default"
PetscErrorCode VecReciprocal_Default(Vec v)
{
  PetscErrorCode ierr;
  PetscInt       i,n;
  PetscScalar    *x;

  PetscFunctionBegin;
  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);
  for (i=0; i<n; i++) {
    if (x[i] != 0.0) x[i] = 1.0/x[i];
  }
  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__
#define __FUNCT__ "VecExp"
/*@
  VecExp - Replaces each component of a vector by e^x_i

  Not collective

  Input Parameter:
. v - The vector

  Output Parameter:
. v - The vector of exponents

  Level: beginner

.seealso:  VecLog(), VecAbs(), VecSqrt(), VecReciprocal()

.keywords: vector, sqrt, square root
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecExp(Vec v)
{
  PetscScalar    *x;
  PetscInt       i, n;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v, VEC_COOKIE,1);
  if (v->ops->exp) {
    ierr = (*v->ops->exp)(v);CHKERRQ(ierr);
  } else {
    ierr = VecGetLocalSize(v, &n);CHKERRQ(ierr);
    ierr = VecGetArray(v, &x);CHKERRQ(ierr);
    for(i = 0; i < n; i++) {
      x[i] = PetscExpScalar(x[i]);
    }
    ierr = VecRestoreArray(v, &x);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef __FUNCT__
#define __FUNCT__ "VecLog"
/*@
  VecLog - Replaces each component of a vector by log(x_i), the natural logarithm

  Not collective

  Input Parameter:
. v - The vector

  Output Parameter:
. v - The vector of logs

  Level: beginner

.seealso:  VecExp(), VecAbs(), VecSqrt(), VecReciprocal()

.keywords: vector, sqrt, square root
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecLog(Vec v)
{
  PetscScalar    *x;
  PetscInt       i, n;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v, VEC_COOKIE,1);
  if (v->ops->log) {
    ierr = (*v->ops->log)(v);CHKERRQ(ierr);
  } else {
    ierr = VecGetLocalSize(v, &n);CHKERRQ(ierr);
    ierr = VecGetArray(v, &x);CHKERRQ(ierr);
    for(i = 0; i < n; i++) {
      x[i] = PetscLogScalar(x[i]);
    }
    ierr = VecRestoreArray(v, &x);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef __FUNCT__
#define __FUNCT__ "VecSqrt"
/*@
  VecSqrt - Replaces each component of a vector by the square root of its magnitude.

  Not collective

  Input Parameter:
. v - The vector

  Output Parameter:
. v - The vector square root

  Level: beginner

  Note: The actual function is sqrt(|x_i|)

.seealso: VecLog(), VecExp(), VecReciprocal(), VecAbs()

.keywords: vector, sqrt, square root
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecSqrt(Vec v)
{
  PetscScalar    *x;
  PetscInt       i, n;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v, VEC_COOKIE,1);
  if (v->ops->sqrt) {
    ierr = (*v->ops->sqrt)(v);CHKERRQ(ierr);
  } else {
    ierr = VecGetLocalSize(v, &n);CHKERRQ(ierr);
    ierr = VecGetArray(v, &x);CHKERRQ(ierr);
    for(i = 0; i < n; i++) {
      x[i] = sqrt(PetscAbsScalar(x[i]));
    }
    ierr = VecRestoreArray(v, &x);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef __FUNCT__
#define __FUNCT__ "VecDotNorm2"
/*@
  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector

  Collective on Vec

  Input Parameter:
+ s - first vector
- t - second vector

  Output Parameter:
+ dp - s't
- nm - t't

  Level: advanced

.seealso:   VecDot(), VecNorm(), VecDotBegin(), VecNormBegin(), VecDotEnd(), VecNormEnd()

.keywords: vector, sqrt, square root
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecDotNorm2(Vec s,Vec t,PetscScalar *dp, PetscScalar *nm)
{
  PetscScalar    *sx, *tx, dpx = 0.0, nmx = 0.0,work[2],sum[2];
  PetscInt       i, n;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(s, VEC_COOKIE,1);
  PetscValidHeaderSpecific(t, VEC_COOKIE,2);
  PetscValidScalarPointer(dp,3);
  PetscValidScalarPointer(nm,4);
  PetscValidType(s,1);
  PetscValidType(t,2);
  PetscCheckSameTypeAndComm(s,1,t,2);
  if (s->map->N != t->map->N) SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible vector global lengths");
  if (s->map->n != t->map->n) SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible vector local lengths");

  ierr = PetscLogEventBarrierBegin(VEC_DotNormBarrier,s,t,0,0,((PetscObject)s)->comm);CHKERRQ(ierr);
  if (s->ops->dotnorm2) {
    ierr = (*s->ops->dotnorm2)(s,t,dp,nm);CHKERRQ(ierr);
  } else {
    ierr = VecGetLocalSize(s, &n);CHKERRQ(ierr);
    ierr = VecGetArray(s, &sx);CHKERRQ(ierr);
    ierr = VecGetArray(t, &tx);CHKERRQ(ierr);

    for (i = 0; i<n; i++) {
      dpx += sx[i]*PetscConj(tx[i]);
      nmx += tx[i]*PetscConj(tx[i]);
    }
    work[0] = dpx;
    work[1] = nmx;
    ierr = MPI_Allreduce(&work,&sum,2,MPIU_SCALAR,MPIU_SUM,((PetscObject)s)->comm);CHKERRQ(ierr);
    *dp  = sum[0];
    *nm  = sum[1];

    ierr = VecRestoreArray(t, &tx);CHKERRQ(ierr);
    ierr = VecRestoreArray(s, &sx);CHKERRQ(ierr);
    ierr = PetscLogFlops(4.0*n);CHKERRQ(ierr);
  }
  ierr = PetscLogEventBarrierEnd(VEC_DotNormBarrier,s,t,0,0,((PetscObject)s)->comm);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecSum"
/*@
   VecSum - Computes the sum of all the components of a vector.

   Collective on Vec

   Input Parameter:
.  v - the vector 

   Output Parameter:
.  sum - the result

   Level: beginner

   Concepts: sum^of vector entries

.seealso: VecNorm()
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecSum(Vec v,PetscScalar *sum)
{
  PetscErrorCode ierr;
  PetscInt       i,n;
  PetscScalar    *x,lsum = 0.0;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  PetscValidScalarPointer(sum,2);
  ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
  ierr = VecGetArray(v,&x);CHKERRQ(ierr);
  for (i=0; i<n; i++) {
    lsum += x[i];
  }
  ierr = MPI_Allreduce(&lsum,sum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)v)->comm);CHKERRQ(ierr);
  ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecShift"
/*@
   VecShift - Shifts all of the components of a vector by computing
   x[i] = x[i] + shift.

   Collective on Vec

   Input Parameters:
+  v - the vector 
-  shift - the shift

   Output Parameter:
.  v - the shifted vector 

   Level: intermediate

   Concepts: vector^adding constant

@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecShift(Vec v,PetscScalar shift)
{
  PetscErrorCode ierr;
  PetscInt       i,n;
  PetscScalar    *x;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  if (v->ops->shift) {
    ierr = (*v->ops->shift)(v);CHKERRQ(ierr);
  } else {
    ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 
    ierr = VecGetArray(v,&x);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      x[i] += shift;
    }
    ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecAbs"
/*@
   VecAbs - Replaces every element in a vector with its absolute value.

   Collective on Vec

   Input Parameters:
.  v - the vector 

   Level: intermediate

   Concepts: vector^absolute value

@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecAbs(Vec v)
{
  PetscErrorCode ierr;
  PetscInt       i,n;
  PetscScalar    *x;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(v,VEC_COOKIE,1);
  if (v->ops->abs) {
    ierr = (*v->ops->abs)(v);CHKERRQ(ierr);
  } else {
    ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
    ierr = VecGetArray(v,&x);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      x[i] = PetscAbsScalar(x[i]);
    }
    ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef __FUNCT__
#define __FUNCT__ "VecPermute"
/*@
  VecPermute - Permutes a vector in place using the given ordering.

  Input Parameters:
+ vec   - The vector
. order - The ordering
- inv   - The flag for inverting the permutation

  Level: beginner

  Note: This function does not yet support parallel Index Sets

.seealso: MatPermute()
.keywords: vec, permute
@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecPermute(Vec x, IS row, PetscTruth inv)
{
  PetscScalar    *array, *newArray;
  const PetscInt *idx;
  PetscInt       i;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  ierr = ISGetIndices(row, &idx);CHKERRQ(ierr);
  ierr = VecGetArray(x, &array);CHKERRQ(ierr);
  ierr = PetscMalloc(x->map->n*sizeof(PetscScalar), &newArray);CHKERRQ(ierr);
#ifdef PETSC_USE_DEBUG
  for(i = 0; i < x->map->n; i++) {
    if ((idx[i] < 0) || (idx[i] >= x->map->n)) {
      SETERRQ2(PETSC_ERR_ARG_CORRUPT, "Permutation index %D is out of bounds: %D", i, idx[i]);
    }
  }
#endif
  if (!inv) {
    for(i = 0; i < x->map->n; i++) newArray[i]      = array[idx[i]];
  } else {
    for(i = 0; i < x->map->n; i++) newArray[idx[i]] = array[i];
  }
  ierr = VecRestoreArray(x, &array);CHKERRQ(ierr);
  ierr = ISRestoreIndices(row, &idx);CHKERRQ(ierr);
  ierr = VecReplaceArray(x, newArray);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "VecEqual"
/*@
   VecEqual - Compares two vectors.

   Collective on Vec

   Input Parameters:
+  vec1 - the first vector
-  vec2 - the second vector

   Output Parameter:
.  flg - PETSC_TRUE if the vectors are equal; PETSC_FALSE otherwise.

   Level: intermediate

   Concepts: equal^two vectors
   Concepts: vector^equality

@*/
PetscErrorCode PETSCVEC_DLLEXPORT VecEqual(Vec vec1,Vec vec2,PetscTruth *flg)
{
  PetscScalar    *v1,*v2; 
  PetscErrorCode ierr; 
  PetscInt       n1,n2,N1,N2; 
  PetscInt       state1,state2; 
  PetscTruth     flg1; 
 
  PetscFunctionBegin; 
  PetscValidHeaderSpecific(vec1,VEC_COOKIE,1); 
  PetscValidHeaderSpecific(vec2,VEC_COOKIE,2); 
  PetscValidPointer(flg,3); 
  if (vec1 == vec2) { 
    *flg = PETSC_TRUE; 
  } else { 
    ierr = VecGetSize(vec1,&N1);CHKERRQ(ierr); 
    ierr = VecGetSize(vec2,&N2);CHKERRQ(ierr); 
    if (N1 != N2) { 
      flg1 = PETSC_FALSE; 
    } else { 
      ierr = VecGetLocalSize(vec1,&n1);CHKERRQ(ierr); 
      ierr = VecGetLocalSize(vec2,&n2);CHKERRQ(ierr); 
      if (n1 != n2) { 
        flg1 = PETSC_FALSE; 
      } else { 
        ierr = PetscObjectStateQuery((PetscObject) vec1,&state1);CHKERRQ(ierr); 
        ierr = PetscObjectStateQuery((PetscObject) vec2,&state2);CHKERRQ(ierr); 
        ierr = VecGetArray(vec1,&v1);CHKERRQ(ierr); 
        ierr = VecGetArray(vec2,&v2);CHKERRQ(ierr);
#if defined(PETSC_USE_COMPLEX)
        {
          PetscInt k;
          flg1 = PETSC_TRUE;
          for (k=0; k<n1; k++){
            if (PetscRealPart(v1[k]) != PetscRealPart(v2[k]) || PetscImaginaryPart(v1[k]) != PetscImaginaryPart(v2[k])){
              flg1 = PETSC_FALSE;
              break;
            }
          }
        }
#else 
        ierr = PetscMemcmp(v1,v2,n1*sizeof(PetscScalar),&flg1);CHKERRQ(ierr); 
#endif
        ierr = VecRestoreArray(vec1,&v1);CHKERRQ(ierr); 
        ierr = VecRestoreArray(vec2,&v2);CHKERRQ(ierr); 
        ierr = PetscObjectSetState((PetscObject) vec1,state1);CHKERRQ(ierr); 
        ierr = PetscObjectSetState((PetscObject) vec2,state2);CHKERRQ(ierr); 
      } 
    } 
    /* combine results from all processors */ 
    ierr = MPI_Allreduce(&flg1,flg,1,MPI_INT,MPI_MIN,((PetscObject)vec1)->comm);CHKERRQ(ierr); 
  } 
  PetscFunctionReturn(0); 
}



